# libs ----
library(sf)
library(geomander)
library(tidyverse)
library(redist)
library(ppmf)
library(here)
library(gt)
library(kableExtra)

# helper ----
source('R/00_custom_functions.R')

# constants
sf::sf_use_s2(FALSE)
ppmf19_path <- Sys.getenv('ppmf19')
ppmf12_path <- Sys.getenv('ppmf12')#"data-raw/ppmf_12.csv"
ppmf4_path <- Sys.getenv('ppmf4')#"data-raw/ppmf_04.csv"
states <- c('AL', 'DE', 'LA', 'MS', 'NC', 'PA', 'SC', 'UT', 'WA')

out <- vector(mode = 'list', length = length(states))

# Run!
for (state in states) {
    # data ----
    ppmf19 <- read_ppmf(state, ppmf19_path)
    ppmf12 <- read_ppmf(state, ppmf12_path)
    ppmf4 <- read_ppmf(state, ppmf4_path)

    ppmf19 <- ppmf19 %>% add_geoid() %>% agg() %>% breakdown_geoid()
    ppmf19 <- ppmf19 %>%
        rename_with(~ add_pref(.x,pref = 'v19'), starts_with(c('pop', 'vap')))
    ppmf12 <- ppmf12 %>% add_geoid() %>% agg() %>% breakdown_geoid()
    ppmf12 <- ppmf12 %>%
        rename_with(~ add_pref(.x,pref = 'v12'), starts_with(c('pop', 'vap')))
    ppmf4 <- ppmf4 %>% add_geoid() %>% agg() %>% breakdown_geoid()
    ppmf4 <- ppmf4 %>%
        rename_with(~ add_pref(.x,pref = 'v4'), starts_with(c('pop', 'vap')))


    # comparison ----
    census <- create_block_table(state = state)

    # all joined ----
    block <- census %>%
        left_join(ppmf19, by = 'GEOID') %>%
        left_join(ppmf12, by = 'GEOID') %>%
        left_join(ppmf4, by = 'GEOID')
    # and remove duplicates
    block <- block %>% select(-contains('.'))
    # and set missing block pop/vap to 0
    block[is.na(block)] <- 0
    # add block_group back (dropped by contains('.'))
    block <- block %>% breakdown_geoid()


    # Get Census Shapes :
    sld_low <- tigris::state_legislative_districts(state, 'lower')
    sld_up <- tigris::state_legislative_districts(state, 'upper')
    cd <- tigris::congressional_districts(state)

    # match!
    block_sld_low_match <- geo_match(from = block, to = sld_low, method = 'centroid')
    block_sld_up_match <- geo_match(from = block, to = sld_up, method = 'centroid')
    block_cd_match <- geo_match(from = block, to = cd, method = 'centroid')


    block <- block %>%
        mutate(sld_low = block_sld_low_match,
               sld_up = block_sld_up_match,
               cd = block_cd_match) %>%
        st_drop_geometry()


    out[[which(state == states)]] <- block

}

saveRDS(out, file = here('data-raw/state_block_aggs.Rds'), compress = 'xz')


diffs <- lapply(out, function(x){
    sld_up <- x %>% group_by(sld_up) %>%
        summarize(pop = sum(pop),
                  v19_pop = sum(v19_pop),
                  v12_pop = sum(v12_pop),
                  v4_pop = sum(v4_pop))
    sld_low <- x %>% group_by(sld_low) %>%
        summarize(pop = sum(pop),
                  v19_pop = sum(v19_pop),
                  v12_pop = sum(v12_pop),
                  v4_pop = sum(v4_pop))
    cd <- x %>% group_by(cd) %>%
        summarize(pop = sum(pop),
                  v19_pop = sum(v19_pop),
                  v12_pop = sum(v12_pop),
                  v4_pop = sum(v4_pop))

    list(sld_up = sld_up,
         sld_low = sld_low,
         cd = cd)
})

names(diffs) <- states
saveRDS(diffs, file = here('data/numbers/diffs.Rds'))

#  Summary stats ---
diffs <- readRDS(here("data/numbers/diffs.Rds"))

statecode <- state.name; names(statecode) <- state.abb

tbl_diff_cd <- function(tbl, das = "v19_pop") {
    map_dfr(tbl,
            function(x) {
                vec <- x$cd[[das]] - x$cd$pop

                tibble(
                    min  = min(vec),
                    med  = median(vec),
                    mean = mean(vec),
                    max  = max(vec),
                    sd   = sd(vec),
                    N    = length(vec)
                )
            },
            .id = "state") %>%
        mutate(state = recode(state, !!!statecode))
}

textab <- function(tbl) {
    kbl(tbl,
        col.names = c("State", "Min", "Median", "Mean", "Max", "SD", "CDs"),
        format = "latex",
        linesep = "",
        format.args = list(big.mark = ","),
        booktabs = TRUE,
        digits = 0)
}


# used directly in the memo Rmd
tbl_diff_cd(diffs, das = "v4_pop") %>% textab()
tbl_diff_cd(diffs, das = "v12_pop") %>% textab()
tbl_diff_cd(diffs, das = "v19_pop") %>% textab()





